source("~/Documents/PhD/GlobalDA/code/2_inference_TMB/helper_TMB.R")
source("~/Documents/PhD/CDA_in_Cancer/code/functions/meretricious/pretty_plots/prettySignatures.R")
library(gtools)

sigs_pcawg_paper <- read.table("../../data/restricted/pcawg/SigProfilier_PCAWG_WGS_probabilities_SBS.csv", sep = ",", h=T)
# rownames(sigs_pcawg_paper) <- sigs_pcawg_paper[,1]
sigs_pcawg_paper[,c(1,3,4)] <- NULL

# pheatmap::pheatmap(sigs_pcawg_paper)
rownames(sigs_pcawg_paper) <- make.unique(sigs_pcawg_paper$Cancer.Type)
sigs_pcawg_paper$Cancer.Type <- NULL
sigs_pcawg_paper_bool <- apply(sigs_pcawg_paper > 0, 2, as.numeric)

ct <- sub("\\..*", "", rownames(sigs_pcawg_paper))
table(ct)
## ct
##     Biliary-AdenoCA         Bladder-TCC         Bone-Benign          Bone-Epith 
##                3360                2208                1536                1056 
##      Bone-Osteosarc      Breast-AdenoCA         Breast-DCIS    Breast-LobularCA 
##                3648               19008                 288                1248 
##      Cervix-AdenoCA          Cervix-SCC             CNS-GBM         CNS-Medullo 
##                 192                1728                3936               14016 
##           CNS-Oligo       CNS-PiloAstro    ColoRect-AdenoCA         Eso-AdenoCA 
##                1728                8544                5760                9408 
##            Head-SCC        Kidney-ChRCC          Kidney-RCC           Liver-HCC 
##                5472                4320               13824               31296 
##        Lung-AdenoCA            Lung-SCC          Lymph-BNHL           Lymph-CLL 
##                3648                4608               10272                9120 
##         Myeloid-AML         Myeloid-MDS         Myeloid-MPN       Ovary-AdenoCA 
##                1056                 384                5376               10848 
##        Panc-AdenoCA      Panc-Endocrine       Prost-AdenoCA       Skin-Melanoma 
##               23136                8160               27456               10272 
##  SoftTissue-Leiomyo SoftTissue-Liposarc     Stomach-AdenoCA         Thy-AdenoCA 
##                1440                1824                7200                4608 
##      Uterus-AdenoCA 
##                4896
pheatmap::pheatmap(sigs_pcawg_paper_bool[grepl("Biliary.AdenoCA", ct),])

## get active signatures in each cancer type
active_per_ct <- sapply(unique(ct), function(ct_it){
  names(which(colSums(sigs_pcawg_paper_bool[grepl(ct_it, ct),]) > 0))
})
active_per_ct$Eso_AdenoCA
## NULL
sigs_pcawg_paper_2 <- read.table("../../data/restricted/pcawg/PCAWG_sigProfiler_SBS_signatures_in_samples.csv", sep = ",", h=T)
ct2 <- sigs_pcawg_paper_2$Cancer.Types
rownames(sigs_pcawg_paper_2) = paste0(sigs_pcawg_paper_2$Cancer.Types, '-',
                                      sigs_pcawg_paper_2$Sample.Names)
sigs_pcawg_paper_2 <- sigs_pcawg_paper_2[,-c(1:3)]
lapply(sort(unique(ct2)), function(ct_it){
createBarplot(normalise_rw(as(sigs_pcawg_paper_2[grepl(ct_it, ct2),
                                                 active_per_ct[[ct_it]]], 'matrix')))+
    ggtitle(ct_it)
})
## Creating plot... it might take some time if the data are large. Number of samples: 35
## Creating plot... it might take some time if the data are large. Number of samples: 23
## Creating plot... it might take some time if the data are large. Number of samples: 16
## Creating plot... it might take some time if the data are large. Number of samples: 11
## Creating plot... it might take some time if the data are large. Number of samples: 38
## Creating plot... it might take some time if the data are large. Number of samples: 198
## Creating plot... it might take some time if the data are large. Number of samples: 3
## Creating plot... it might take some time if the data are large. Number of samples: 13
## Creating plot... it might take some time if the data are large. Number of samples: 2
## Creating plot... it might take some time if the data are large. Number of samples: 18
## Creating plot... it might take some time if the data are large. Number of samples: 41
## Creating plot... it might take some time if the data are large. Number of samples: 146
## Creating plot... it might take some time if the data are large. Number of samples: 18
## Creating plot... it might take some time if the data are large. Number of samples: 89
## Creating plot... it might take some time if the data are large. Number of samples: 60
## Creating plot... it might take some time if the data are large. Number of samples: 98
## Creating plot... it might take some time if the data are large. Number of samples: 57
## Creating plot... it might take some time if the data are large. Number of samples: 45
## Creating plot... it might take some time if the data are large. Number of samples: 144
## Creating plot... it might take some time if the data are large. Number of samples: 326
## Creating plot... it might take some time if the data are large. Number of samples: 38
## Creating plot... it might take some time if the data are large. Number of samples: 48
## Creating plot... it might take some time if the data are large. Number of samples: 107
## Creating plot... it might take some time if the data are large. Number of samples: 95
## Creating plot... it might take some time if the data are large. Number of samples: 11
## Creating plot... it might take some time if the data are large. Number of samples: 4
## Creating plot... it might take some time if the data are large. Number of samples: 56
## Creating plot... it might take some time if the data are large. Number of samples: 113
## Creating plot... it might take some time if the data are large. Number of samples: 241
## Creating plot... it might take some time if the data are large. Number of samples: 85
## Creating plot... it might take some time if the data are large. Number of samples: 286
## Creating plot... it might take some time if the data are large. Number of samples: 107
## Creating plot... it might take some time if the data are large. Number of samples: 15
## Creating plot... it might take some time if the data are large. Number of samples: 19
## Creating plot... it might take some time if the data are large. Number of samples: 75
## Creating plot... it might take some time if the data are large. Number of samples: 48
## Creating plot... it might take some time if the data are large. Number of samples: 51
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

## 
## [[33]]

## 
## [[34]]

## 
## [[35]]

## 
## [[36]]

## 
## [[37]]

# active_sigs <- sapply(unique(ct), function(ct_it){
#    gsub("_.*", "", (names(which(colSums(sigs_pcawg_paper_bool[grepl(ct_it, ct),]) > 0))))
# })
uniq_sigs <- gtools::mixedsort(unique(unlist(active_per_ct)))
active_sigs_tab <- t(sapply(active_per_ct, function(i) as.numeric(uniq_sigs %in% i)))
colnames(active_sigs_tab) <- uniq_sigs
rownames(active_sigs_tab) <- toupper(gsub("_", "-", rownames(active_sigs_tab)))
active_sigs_tab = cbind(rownames(active_sigs_tab), active_sigs_tab)
colnames(active_sigs_tab)[1] = 'id2'
write.table(active_sigs_tab, "../../data/cosmic/active_signatures_PCAWGpaper.txt", quote = F, sep = "\t", col.names = NA)